clear all
set maxvar 10000
set more off

* set up the working directory in your computer
cd 


* below is just to run one time ------------ (not run) 
* ---------- extract additional information from original data 
*use "GSS7212_R2.DTA", clear
*egen n_biokid = anycount(kdrel*), values(1)
*egen n_nonbiokid = anycount(kdrel*), values(2 3)
*egen n_kid = rownonmiss(kdrel*)
*
*gen c_married = (marital==1) if ~missing(marital)
*gen c_single = (marital==5) if ~missing(marital)
*
*gen dataset = "GSS"
*sort year id
*gen pid = _n 
*keep pid year id dataset agekdbrn n_biokid n_nonbiokid n_kid kdrel* kdsex* kdyrbrn* kdalive* kdeduc* c_married c_single marital
*save gss_bio_data, replace 
*
*use "essw1_w6.dta", clear
*gen id = idno
*keep id essround cntry cldnhh cldnhhn cldnhhg clnhhbo
*gen dataset = "ESS" 
*sort essround cntry id
*gen pid = _n 
*save ess_havekid, replace 
*
** ---------- merge new data into the existing data sets
*use daughter_analytic, clear
*merge 1:1 pid id year dataset using gss_bio_data, gen(m_bio)
*merge 1:1 pid id essround cntry using ess_havekid, gen(m_noncohab)
*saveold daughter_analytic2, replace version(12)


* load new data set 
use daughter_analytic2, clear 

* ---------- variable respecification following the prior codes 
replace c_educ = 20 if c_educ > 20 & ~missing(c_educ)

* ----------  Table 1. comparison btw "bio" sample and "analytic" sample 
* check the difference btw # of childs / # of bio/aopted kids in the child roster
gen b_sample = 0 if year == 1994 
replace b_sample = 1 if ~missing(fx_bdaughter) & year == 1994 

gen a_sample = 0 if year == 1994 & ~missing(fx_daughter)
replace a_sample = 0 if out_sample == 1 & year == 1994 & a_sample == 0 
replace a_sample = 1 if out_sample == 100 & year == 1994 & a_sample == 0

gen nonbio = n_nonbiokid > 0 if ~missing(n_nonbiokid)

gen s_sample = 0 if b_sample == 1 
replace s_sample = 1 if b_sample == 1 & a_sample >= 0 & ~missing(a_sample)
replace s_sample = 2 if b_sample == 1 & a_sample >= 1 & ~missing(a_sample)

gen f_mismatch = fx_bdaughter != fx_daughter if ~missing(fx_bdaughter) & ~missing(fx_daughter)


* --------------- table 1 
gen v_name = ""
gen v_sample0 = .
gen v_sample1 = .
gen v_sample2 = .

replace v_name = "Sample Size" in 1
replace v_name = "% first biological daughter" in 2 
replace v_name = "% first cohab daughter" in 3 

replace v_name = "% Republican" in 4 
replace v_name = "treatment effect (biological)" in 5 
replace v_name = "treatment effect (cohab)" in 6

replace v_name = "% mismatch of the sex of the first child" in 7


forvalues i = 0/2{
	sum s_sample if s_sample >= `i'
	local n = 1
	replace v_sample`i' = `r(N)' in `n'
	
if `i' == 0 {
	local n = `n' + 1
	mean fx_bdaughter if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[fx_bdaughter] in `n'
	
	local n = `n' + 1
	mean fx_daughter if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[fx_daughter] in `n'

	local n = `n' + 1 

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[d_rep] in `n'

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & fx_bdaughter == 1 & ~missing(s_sample)
	local mu_daughter = _b[d_rep]
	mean d_rep if s_sample >= `i' & fx_bdaughter == 0 & ~missing(s_sample)
	local mu_son = _b[d_rep]
	replace v_sample`i' = `mu_daughter'-`mu_son' in `n'

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & fx_daughter == 1 & ~missing(s_sample)
	local mu_daughter = _b[d_rep]
	mean d_rep if s_sample >= `i' & fx_daughter == 0 & ~missing(s_sample)
	local mu_son = _b[d_rep]
	replace v_sample`i' = `mu_daughter'-`mu_son' in `n'

} 
else {
	local n = `n' + 1
	mean fx_bdaughter if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[fx_bdaughter] in `n'

	local n = `n' + 1
	mean fx_daughter if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[fx_daughter] in `n'

	local n = `n' + 1
	mean f_mismatch if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[f_mismatch] * 2 in `n'

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & ~missing(s_sample)
	replace v_sample`i' = _b[d_rep] in `n'

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & fx_bdaughter == 1 & ~missing(s_sample)
	local mu_daughter = _b[d_rep]
	mean d_rep if s_sample >= `i' & fx_bdaughter == 0 & ~missing(s_sample)
	local mu_son = _b[d_rep]
	replace v_sample`i' = `mu_daughter'-`mu_son' in `n'

	local n = `n' + 1
	mean d_rep if s_sample >= `i' & fx_daughter == 1 & ~missing(s_sample)
	local mu_daughter = _b[d_rep]
	mean d_rep if s_sample >= `i' & fx_daughter == 0 & ~missing(s_sample)
	local mu_son = _b[d_rep]
	replace v_sample`i' = `mu_daughter'-`mu_son' in `n'
}
}

outsheet v_* using tab1_measurement_error.csv, comma replace, if v_name != ""


* ------------ table 2 
local cv1 c_female c_age c_born c_educ
* add: x_bageoldest x_oldage
estimates clear 
 reg d_repscale fx_bdaughter `cv1' x_bageoldest if cnum == 36 & year == 1994 & b_sample == 1
estimates store m1 
 reg d_repscale fx_daughter `cv1' x_oldage if cnum == 36 & year == 1994 & a_sample == 1
estimates store m2
 reg d_repscale fx_bdaughter `cv1' x_bageoldest if cnum == 36 & year == 1994 & a_sample == 1 & b_sample == 1
estimates store m3
 reg d_repscale fx_daughter `cv1' x_oldage if cnum == 36 & year == 1994 & a_sample == 1 & b_sample == 1
estimates store m4
esttab * using tab2_partyid.csv, csv replace se star (+ 0.1 * 0.05 ** 0.01) keep(fx_bdaughter fx_daughter) nogap 


* -------- table 3
local cv1 c_female  c_born c_educ c_age x_oldage
egen n_nonmiss = rowmiss(`cv1')
estimates clear 
local cv1 c_female  c_born c_educ c_age x_oldage
reg d_conscale100 i.cnum i.year fx_daughter if out_sample == 100 & essround == 2 & n_nonmiss == 0 
estimates store m1 
reg d_conscale100 i.cnum i.year fx_daughter if out_sample == 100 & essround == 2 & cldnhh == 2 & n_nonmiss == 0 
estimates store m2 
reg d_conscale100 i.cnum i.year fx_daughter `cv1' if out_sample == 100 & essround == 2 & n_nonmiss == 0 
estimates store m3 
reg d_conscale100 i.cnum i.year fx_daughter `cv1' if out_sample == 100 & essround == 2 & cldnhh == 2 & n_nonmiss == 0 
estimates store m4 
esttab * using tab3_ess.csv, csv replace se star (+ 0.1 * 0.05 ** 0.01) keep(fx_daughter) nogap 


* --------- table 4
tab nonbio if out_sample == 100
tab nonbio if ~missing(fx_bdaughter)

estimates clear
local cv1 c_female  c_born c_educ c_age x_bageoldest
reg d_repscale fx_bdaughter `cv1'  if ~missing(fx_bdaughter)
estimates store m1
reg d_repscale nonbio `cv1'  if ~missing(fx_bdaughter)
estimates store m2
reg d_repscale fx_bdaughter nonbio `cv1'  if ~missing(fx_bdaughter)
estimates store m3
reg d_repscale fx_bdaughter nonbio c.fx_bdaughter#c.nonbio `cv1' if ~missing(fx_bdaughter)
estimates store m4

local cv1 c_female  c_born c_educ c_age x_bageoldest
reg d_conscale fx_bdaughter `cv1'  if ~missing(fx_bdaughter)
estimates store c1
reg d_conscale nonbio `cv1'  if ~missing(fx_bdaughter)
estimates store c2
reg d_conscale fx_bdaughter nonbio `cv1'  if ~missing(fx_bdaughter)
estimates store c3
reg d_conscale fx_bdaughter nonbio c.fx_bdaughter#c.nonbio `cv1' if ~missing(fx_bdaughter)
estimates store c4
esttab * using tab4_nonbio.csv, csv replace se star (+ 0.1 * 0.05 ** 0.01) keep(fx_bdaughter nonbio c.fx_bdaughter#c.nonbio) nogap 

* ------------- table s2 
gen c_divorced = marital == 3 if ~missing(marital)
gen c_separated = marital == 4 if ~missing(marital)

estimates clear 
local cv1 c_female x_hsize childs  c_educ c_age x_oldage 
logit fx_bdaughter fx_daughter `cv1'  c_married if out_sample == 100 
estimates store m1 
local cv1 c_female x_hsize childs  c_educ c_age x_oldage 
logit f_mismatch fx_daughter `cv1'  c_married  if out_sample == 100 
estimates store m2 
esttab * using tabs2_predict.csv, csv replace nogap se star (+ 0.1 * 0.05 ** 0.01 *** 0.001) 


